---
title: "Threshold calibration"
output: html_notebook
---

This notebook calculates the cutoffs at each threshold for the given
training dataset and then computes the precision and recall, along with
number and proportion of cases flagged at each threshold.
This can be used to calibrate the threshold based on desired
precision and operating capacity.

```{r setup, include=FALSE}
library(tidyverse)
library(ggpubr)
source("../prediction/heuristic_language_prediction.R")
```

Read in training, test data and get thresholds.
```{r}
binned_train_df <- read_rds("../data/binned_train_df_l2.rds")
bins_by_perc <- binned_train_df %>%
    group_by(
      age_bin,
      addr_score_bin,
      first_name_score_bin,
      last_name_score_bin
    ) %>%
    summarize(
        perc_spanish = mean(is_spanish)
    )

thresholds <- unique(
  bins_by_perc[order(-bins_by_perc$perc_spanish), ]$perc_spanish
)
# This value is set to address sampling variability
# (i.e., some bins may only have a small number of individuals).
threshold_bin_count <- 10

binned_test_df <- read_rds("../data/binned_test_df_l2.rds")
```

Calculate the precision and recall for each threshold on the test split of L2.
```{r}
num_flagged_l2 <- c()
proportion_flagged_l2 <- c()
tp_l2 <- c()
fp_l2 <- c()
tn_l2 <- c()
fn_l2 <- c()

start_time <- Sys.time()

for (threshold in thresholds) {
    cutoffs <- set_cutoffs(threshold, threshold_bin_count, binned_train_df)

    l2_predictions <- binned_test_df %>%
                    left_join(
                      cutoffs,
                      by = c(
                        "age_bin",
                        "addr_score_bin",
                        "first_name_score_bin",
                        "last_name_score_bin"
                      )
                    ) %>%
                    mutate(
                        pred = ifelse(cutoff == "In", 1, 0)
                    )
    num_flagged_l2 <- append(
      num_flagged_l2,
      nrow(l2_predictions %>% filter(pred == 1))
    )
    proportion_flagged_l2 <- append(
      proportion_flagged_l2,
      nrow(l2_predictions %>% filter(pred == 1)) / nrow(l2_predictions)
    )
    tp_l2 <- append(
      tp_l2,
      nrow(l2_predictions %>% filter(is_spanish == 1 & pred == 1))
    )
    tn_l2 <- append(
      tn_l2,
      nrow(l2_predictions %>% filter(is_spanish == 0 & pred == 0))
    )
    fp_l2 <- append(
      fp_l2,
      nrow(l2_predictions %>% filter(is_spanish == 0 & pred == 1))
    )
    fn_l2 <- append(
      fn_l2,
      nrow(l2_predictions %>% filter(is_spanish == 1 & pred == 0))
    )
}

end_time <- Sys.time()
end_time - start_time
```

```{r}
threshold_precision_volume <- tibble(
  threshold = thresholds,
  num_flagged_l2 = num_flagged_l2,
  proportion_flagged_l2 = proportion_flagged_l2,
  tp_l2 = tp_l2,
  fp_l2 = fp_l2,
  tn_l2 = tn_l2,
  fn_l2 = fn_l2
)

write_rds(
  threshold_precision_volume,
  "../data/threshold_precision_volume_l2.rds"
)
```
